'''
@Author: Zhengkun Tian
@Email: zhengkun.tian@outlook.com
@Date: 2020-07-04 00:20:33
@LastEditTime: 2020-07-04 00:25:46
@FilePath: \OpenASR\oasr\data\__init__.py
'''
import os

BLK = 0
PAD = 0
BOS = 1
EOS = 1
UNK = 2

BOS_TOKEN = '<S/E>'
EOS_TOKEN = '<S/E>'
PAD_TOKEN = '<PAD>'
UNK_TOKEN = '<UNK>'
SPACE_TOKEN = '<SPACE>'

NUM_UNUSED_TOKENS = 10

def load_vocab(vocab_file):
    unit2idx = {'<PAD>': 0, '<S/E>': 1, '<UNK>': 2}
    with open(os.path.join(vocab_file), 'r', encoding='utf-8') as v:
        idx = 3
        for line in v:
            unit, _ = line.strip().split()
            unit2idx[unit] = idx
            idx += 1
    return unit2idx


def load_idx2unit_map(vocab_file):

    unit2idx = load_vocab(vocab_file)
    idx2unit = {v: k for (k, v) in unit2idx.items()}

    return idx2unit
